In [8]:
%matplotlib inline
In [9]:
from keras import backend
backend.set_image_dim_ordering('th')
Test submission to "Dogs vs Cats redux" Kaggle competition
https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition
In [10]:
import zipfile
import tempfile
import os
In [11]:
tmp_dir = tempfile.mkdtemp()
tmp_dir
Out[11]:
In [12]:
zf = zipfile.ZipFile("../data/redux/train.zip")
zf.extractall(tmp_dir)
zf.close
Out[12]:
In [13]:
zf = zipfile.ZipFile("../data/redux/test.zip")
zf.extractall(tmp_dir)
zf.close
Out[13]:
In [14]:
import sys
sys.path.append('../../nbs')
import utils
from utils import *
In [15]:
import random
import shutil
g = glob(os.path.join(tmp_dir, 'train', '*.jpg'))
In [16]:
# create sample set
smp_dir = os.path.join(tmp_dir, 'sample')
os.makedirs(os.path.join(smp_dir, 'train'))
random.shuffle(g)
for file in g[0:100]:
shutil.copy(file, os.path.join(smp_dir, 'train', os.path.basename(file)))
In [17]:
# create validation set
os.makedirs(os.path.join(tmp_dir, 'valid'))
random.shuffle(g)
for file in g[0:3750]:
shutil.move(file, os.path.join(tmp_dir, 'valid', os.path.basename(file)))
In [18]:
# create validation set inside sample
g = glob(os.path.join(smp_dir, 'train', '*.jpg'))
random.shuffle(g)
os.makedirs(os.path.join(smp_dir, 'valid'))
for file in g[0:20]:
shutil.move(file, os.path.join(smp_dir, 'valid', os.path.basename(file)))
In [19]:
# create sample test set
g = glob(os.path.join(tmp_dir, 'test', '*.jpg'))
random.shuffle(g)
os.makedirs(os.path.join(smp_dir, 'test', 'class'))
for file in g[0:20]:
shutil.copy(file, os.path.join(smp_dir, 'test', 'class', os.path.basename(file)))
In [20]:
# move test set for image batch generator
g = glob(os.path.join(tmp_dir, 'test', '*.jpg'))
os.makedirs(os.path.join(tmp_dir, 'test', 'class'))
for file in g:
shutil.move(file, os.path.join(tmp_dir, 'test', 'class', os.path.basename(file)))
In [21]:
# separate classes
def sepClasses(path):
os.mkdir(os.path.join(path, 'cats'))
os.mkdir(os.path.join(path, 'dogs'))
for file in glob(os.path.join(path, 'cat*.jpg')):
shutil.move(file, os.path.join(path, 'cats', os.path.basename(file)))
for file in glob(os.path.join(path, 'dog*.jpg')):
shutil.move(file, os.path.join(path, 'dogs', os.path.basename(file)))
In [22]:
sepClasses(os.path.join(smp_dir, 'train'))
sepClasses(os.path.join(smp_dir, 'valid'))
sepClasses(os.path.join(tmp_dir, 'train'))
sepClasses(os.path.join(tmp_dir, 'valid'))
In [23]:
path = smp_dir # for sample train
# path = tmp_dir # for real train
In [24]:
if path == smp_dir:
batch_size=5
else:
batch_size=64
In [25]:
import importlib
import vgg16; importlib.reload(vgg16)
from vgg16 import Vgg16
In [30]:
vgg = Vgg16()
batches = vgg.get_batches(os.path.join(path,'train'), batch_size=batch_size)
val_batches = vgg.get_batches(os.path.join(path,'valid'), batch_size=batch_size)
In [32]:
vgg.finetune(batches)
In [33]:
# keras output problem workaround
oldStdout = sys.stdout
sys.stdout = open("keras.out", 'w')
In [34]:
vgg.fit(batches, val_batches, nb_epoch=2)
In [35]:
# keras output problem workaround
sys.stdout = oldStdout
In [36]:
# save model
if path == smp_dir:
model_name = 'sample_model.h5'
else:
model_name = 'model.h5'
vgg.model.save(model_name)
In [76]:
# load model
# vgg.model.load_weights('sample_model.h5')
# vgg.model.load_weights('model.h5')
In [77]:
imgs,labels = next(val_batches)
In [78]:
plots(imgs, titles=labels)
In [79]:
vgg.predict(imgs, True)
Out[79]:
In [95]:
test_batches, pred = vgg.test(os.path.join(tmp_dir, 'test'), batch_size=5)
In [96]:
filenames = test_batches.filenames
In [97]:
ids = [os.path.splitext(os.path.basename(file))[0] for file in filenames]
In [100]:
submission = np.stack([ids, pred[:,1]], axis=1)
submission
Out[100]:
In [101]:
np.savetxt('submission.csv', submission, header='id,label', fmt='%s,%s')
In [ ]: